Intro

This document serves as a supplementary material for my thesis demonstrating some of the scripting done to achieve the presented results. It should allow anyone to reproduce the data and evaluate the results for themselves.

Setup

Load libraries, set working directory and seed

## Warning: package 'flowCore' was built under R version 4.1.1
## Warning: package 'rjson' was built under R version 4.1.2
## Warning: package 'flowWorkspace' was built under R version 4.1.1
## As part of improvements to flowWorkspace, some behavior of
## GatingSet objects has changed. For details, please read the section
## titled "The cytoframe and cytoset classes" in the package vignette:
## 
##   vignette("flowWorkspace-Introduction", "flowWorkspace")
## Warning: package 'FNN' was built under R version 4.1.3
## Warning: package 'ggplot2' was built under R version 4.1.3
## Warning: package 'EmbedSOM' was built under R version 4.1.3
## Warning: package 'gridExtra' was built under R version 4.1.2
## Warning: package 'cowplot' was built under R version 4.1.2
## Warning: package 'ggpubr' was built under R version 4.1.3
## 
## Attaching package: 'ggpubr'
## The following object is masked from 'package:cowplot':
## 
##     get_legend
## Warning: package 'scales' was built under R version 4.1.3

Set working directory to the location of this notebook.

try({
  dr = getSrcDirectory()[1]
})
## Error in getSrcFilename(x, full.names = TRUE, unique = unique) : 
##   argument "x" is missing, with no default
#when sourcing the file
try({
  dr = dirname(rstudioapi::getActiveDocumentContext()$path)
})
#for running code directly in rstudio
setwd(dr) #set for usage as a script/running lines
knitr::opts_knit$set(root.dir = dr) #set for R markdown chunk execution

Define functions

Artificial data pipeline

Load and parse spectra from json

spctr = fromJSON(file = "panel1_lymph_subtracted_fixed.json")
spectra = matrix(ncol = 64, nrow = length(spctr))
spec_names = c()
for (i in 1:length(spctr)) {
  spectra[i, ] = spctr[[i]]$spectrum$mS
  spec_names = c(spec_names, spctr[[i]]$antigen)
}
colnames(spectra) = spctr[[1]]$spectrum$channels
rm(spctr)
rownames(spectra) = spec_names
nspectra = nrow(spectra)
ndetects = ncol(spectra)

Load and parse phenotype table

Unpack the tree-like structure of the phenotype table using previously defined recursive function.

pheno = read.csv('phenotypes_noAFonly.csv',
                 sep = ',',
                 fileEncoding = "UTF-8-BOM")
todel = c()
pheno = recur_unpack(c('base'), pheno, todel)
#write.csv(pheno,'pheno_unpacked.csv') #if you are interested in unpacked phenotypes uncomment this

Comparison of unmixing methods

MSE based comparison

  1. Compute nougad unmixing

Change nthreads parameter appropriately up to the number of threads available to your machine.

stronk = c(2, 3, 9, 14, 17, 21)
res = generate_artificial_cytometry_data(spectra, pheno, 5000, stronk)
received = res[[1]]
gt = res[[2]]
nr = res[[3]]
len_umx = length(gt)
len_rec = length(received)
start.time = Sys.time()
ngd = nougadmt(
  received,
  spectra = spectra,
  snw = 5,
  spw = 1,
  nw = 200,
  start = 2,
  iters = 500,
  alpha = 0.01,
  nthreads = detectCores()
)$unmixed
stop.time = Sys.time()
cat('Multithreaded nougad: ')
## Multithreaded nougad:
stop.time - start.time
## Time difference of 0.498981 secs

If you want to compare performance with the singlethreaded implementation.

start.time = Sys.time()
ngd = nougad(
  received,
  spectra = spectra,
  snw = 5,
  spw = 1,
  nw = 200,
  start = 2,
  iters = 500,
  alpha = 0.01
)$unmixed
stop.time = Sys.time()
cat('Singlehreaded nougad: ')
## Singlehreaded nougad:
stop.time - start.time
## Time difference of 4.479315 secs

(Speed up from going multithreaded is almost 20x on my Ryzen 9 5900X CPU with 12cores/24threads and 64GB of RAM. For 100000 cells this means going from ~112s to ~5.7s which is significant.)

  1. Compute OLS unmixing
ols = t(lm(t(received) ~ t(spectra) + 0)$coefficients)
  1. Compute weighted OLS
wols = ols
for (i in 1:nr) {
  ws = norm(pmax(0, received[i, ] / sum(received[i, ])))
  wols[i, ] = t(lm(t(received[i, , drop = F]) ~ t(spectra) + 0, weights =
                     ws)$coefficients)
}
  1. Compute MSE against ground truth
ngd_t = trans(ngd)
ols_t = trans(ols)
wols_t = trans(wols)
gt_t = trans(gt)
ngd_diff = mse(gt_t, ngd_t)
wols_diff = mse(gt_t, wols_t)
ols_diff = mse(gt_t, ols_t)
cat(paste0('MSE for NGD: ', ngd_diff , '\n'))
## MSE for NGD: 0.762993576176325
cat(paste0('MSE for WOLS: ', wols_diff , '\n'))
## MSE for WOLS: 1.88047084606996
cat(paste0('MSE for OLS: ', ols_diff , '\n'))
## MSE for OLS: 1.24559027180041

Compare error distributions

  1. Compare error distribution shape and magnitude over different methods.
ngd_err = (ngd_t - gt_t) ^ 2
ols_err = (ols_t - gt_t) ^ 2
wols_err = (wols_t - gt_t) ^ 2
x = 1:len_umx
par(
  mfrow = c(3, 1),
  tcl = 0.5,
  mgp = c(0, -1.4, 0),
  mar = c(0, 0, 1.5, 0)
)
plot(
  x,
  ngd_err,
  type = 'l',
  col = 'red',
  pch = 16,
  cex = .1,
  ylim = c(0, 50),
  main = paste0('NOUGAD MSE=', round(ngd_diff, 4), ' SD_mse=', round(sd(ngd_err), 4)),
  xaxt = 'n'
)
plot(
  x,
  wols_err,
  type = 'l',
  col = 'red',
  pch = 16,
  cex = .1,
  ylim = c(0, 50),
  main = paste0('WOLS MSE=', round(wols_diff, 4), ' SD_mse=', round(sd(wols_err), 4)),
  xaxt = 'n'
)
plot(
  x,
  ols_err,
  type = 'l',
  col = 'red',
  pch = 16,
  cex = .1,
  ylim = c(0, 50),
  main = paste0('OLS MSE=', round(ols_diff, 4), ' SD_mse=', round(sd(ols_err), 4)),
  xaxt = 'n'
)

  1. Compare error distribution of mean per cell errors over all methods (Note the “gap” with near 0 errors in all methods - this where dead cells with ~0 expression in all spectra are.)
res_1k = generate_artificial_cytometry_data(spectra, pheno, 1000, stronk)
received_1k = res_1k[[1]]
gt_1k = res_1k[[2]]
nr_1k = res_1k[[3]]
ngd_1k = nougadmt(
  received_1k,
  spectra = spectra,
  snw = 5,
  spw = 1,
  nw = 200,
  start = 2,
  iters = 500,
  alpha = 0.01,
  nthreads=detectCores()
)$unmixed

ols_1k = t(lm(t(received_1k) ~ t(spectra) + 0)$coefficients)
wols_1k = ols_1k
for (i in 1:nr_1k) {
  ws = norm(pmax(0, received_1k[i,] / sum(received_1k[i,])))
  wols_1k[i,] = t(lm(t(received_1k[i, , drop = F]) ~ t(spectra) + 0, weights =
                       ws)$coefficients)
}


ngd_t_1k = trans(ngd_1k)
ols_t_1k = trans(ols_1k)
wols_t_1k = trans(wols_1k)
gt_t_1k = trans(gt_1k)

ngd_err_1k = (ngd_t_1k - gt_t_1k) ^ 2
ols_err_1k = (ols_t_1k - gt_t_1k) ^ 2
wols_err_1k = (wols_t_1k - gt_t_1k) ^ 2

total_brightness_1k = rowSums(received_1k)
ngd_mcr_1k = rowSums(ngd_err_1k) / nspectra
ols_mcr_1k = rowSums(ols_err_1k) / nspectra
wols_mcr_1k = rowSums(wols_err_1k) / nspectra
rnms_1k = rownames(received_1k)
rnms_1k = sapply(strsplit(rnms_1k, split = '.', fixed = TRUE), function(x)
  (x[1]))
uq = unique(rnms_1k)
occurs = Vectorize(grep, 'pattern')(paste0('^', uq, '$'), rnms_1k)
cnts = as.vector(sapply(occurs, length))
coords = c(0)
for (i in 1:length(cnts))
{
  coords[i + 1] = sum(cnts[1:i])
}
coords = coords[-length(coords)]
end_point = 0.5 + length(uq) + length(uq) - 1 #this is the line which does the trick (together with barplot "space = 1" parameter)

x = 1:nr_1k
par(
  mfrow = c(3, 1),
  tcl = 0.5,
  mgp = c(0, -1.4, 0),
  mar = c(0, 0, 1.5, 0)
)
plot(
  x,
  ngd_mcr_1k,
  pch = 16,
  cex = .1,
  ylim = c(0, 15),
  main = 'NOUGAD',
  type = 'l',
  col = 'red',
  xaxt = 'n',
  yaxt = 'n'
)
lines(x, norm(total_brightness_1k) * 15 , col = alpha('green', 0.5))
legend(
  round(nr_1k - 1 / 7 * nr_1k),
  15,
  legend = c("Squared error", "Cell luminance"),
  col = c("red", "green"),
  lty = 1:2
)
axis(2,
     at = seq(0, 15, 2),
     labels = T,
     gap.axis = 0)
plot(
  x,
  wols_mcr_1k,
  pch = 16,
  cex = .1,
  ylim = c(0, 15),
  main = 'WOLS',
  type = 'l',
  col = 'red',
  xaxt = 'n',
  xlab = '',
  yaxt = 'n'
)
lines(x, norm(total_brightness_1k) * 15 , col = alpha('green', 0.5))
text(
  coords,
  par("usr")[3] + 15,
  srt = 90,
  adj = 1,
  xpd = TRUE,
  labels = uq,
  cex = 0.9
)
axis(1,
     gap.axis = 1,
     padj = 0.5,
     xaxt = 'n')
axis(2,
     at = seq(0, 15, 2),
     labels = T,
     gap.axis = 0)
plot(
  x,
  ols_mcr_1k,
  pch = 16,
  cex = .1,
  ylim = c(0, 15),
  main = 'OLS',
  type = 'l',
  col = 'red',
  xaxt = 'n',
  yaxt = 'n'
)
lines(x, norm(total_brightness_1k) * 15 , col = alpha('green', 0.5))
axis(2,
     at = seq(0, 15, 2),
     labels = T,
     gap.axis = 0)

Correlation of mean per cell error with total cell brightness:

ngd_err = (ngd_t - gt_t) ^ 2
ols_err = (ols_t - gt_t) ^ 2
wols_err = (wols_t - gt_t) ^ 2

total_brightness = rowSums(received)
ngd_mcr = rowSums(ngd_err) / nspectra
ols_mcr = rowSums(ols_err) / nspectra
wols_mcr = rowSums(wols_err) / nspectra

df_ngd = data.frame(
  cell_brightness = norm(total_brightness),
  mean_cell_error = norm(ngd_mcr)
)
df_ngd = df_ngd[order(df_ngd$mean_cell_error),]
df_ols = data.frame(
  cell_brightness = norm(total_brightness),
  mean_cell_error = norm(ols_mcr)
)
df_ols = df_ols[order(df_ols$mean_cell_error),]
df_wols = data.frame(
  cell_brightness = norm(total_brightness),
  mean_cell_error = norm(wols_mcr)
)
df_wols = df_wols[order(df_wols$mean_cell_error),]

cn = cor.test(df_ngd$cell_brightness, df_ngd$mean_cell_error)
co = cor.test(df_ols$cell_brightness, df_ols$mean_cell_error)
cw = cor.test(df_wols$cell_brightness, df_wols$mean_cell_error)

par(mar = c(0, 0, 3, 0), mfrow = c(1, 3))
plot(
  df_ngd$cell_brightness,
  df_ngd$mean_cell_error,
  pch = 16,
  cex = .1,
  main = paste0(
    'NOUGAD r=',
    round(cn$estimate, 3),
    ' 95CI=(',
    round(cn$conf.int[1], 2),
    ';',
    round(cn$conf.int[2], 2),
    ')\n p',
    format.pval(cn$p.value)
  ),
  xaxt = 'n',
  yaxt = 'n'
)
abline(0, 1, col = 'red')
plot(
  df_wols$cell_brightness,
  df_wols$mean_cell_error,
  pch = 16,
  cex = .1,
  main = paste0(
    'WOLS r=',
    round(cw$estimate, 3),
    ' 95CI=(',
    round(cw$conf.int[1], 2),
    ';',
    round(cw$conf.int[2], 2),
    ')\n p',
    format.pval(cw$p.value)
  ),
  xaxt = 'n',
  yaxt = 'n'
)
abline(0, 1, col = 'red')
plot(
  df_ols$cell_brightness,
  df_ols$mean_cell_error,
  pch = 16,
  cex = .1,
  main = paste0(
    'OLS r=',
    round(co$estimate, 3),
    ' 95CI=(',
    round(co$conf.int[1], 2),
    ';',
    round(co$conf.int[2], 2),
    ')\n p',
    format.pval(co$p.value)
  ),
  xaxt = 'n',
  yaxt = 'n'
)
abline(0, 1, col = 'red')

  1. Compare error distribution over different spectra for different methods This should serve to look at which spectra are more prone to errors and how/if the error distribution for each spectrum changes depending on the selected method. Note that counts (Y axis) are log scaled.
spec_int = norm(rowSums(spectra) ^ 2) #Total energy of the given spectrum
mean_ag_expr = norm(colSums(gt) / nspectra)#Mean expression of that spectrum in the data (ground truth)
colvec <- colorRampPalette(c("green", "yellow", "red"))
cvc = c(colvec(10), rep('red', 10))
for (i in 1:nspectra) {
  par(mar = c(0, 2, 2, 0), mfrow = c(1, 3))
  ols_vec = ols_err[, i]
  wols_vec = wols_err[, i]
  ngd_vec = ngd_err[, i]
  xmx = max(c(max(ols_vec), max(wols_vec), max(ngd_vec)))
  h_ols =
    hist(ols_vec,
         plot = FALSE,
         breaks = seq(0, xmx, length.out = 20))
  h_ols$counts = log(h_ols$counts)
  h_ols$counts[h_ols$counts == -Inf] = 0
  h_wols =
    hist(wols_vec,
         plot = FALSE,
         breaks = seq(0, xmx, length.out = 20))
  h_wols$counts = log(h_wols$counts)
  h_wols$counts[h_wols$counts == -Inf] = 0
  h_ngd =
    hist(ngd_vec,
         plot = FALSE,
         breaks = seq(0, xmx, length.out = 20))
  h_ngd$counts = log(h_ngd$counts)
  h_ngd$counts[h_ngd$counts == -Inf] = 0
  ymx = max(c(max(h_ols$counts), max(h_wols$counts), max(h_ngd$counts)))
  plot(
    h_ngd,
    col = cvc,
    main = paste0(
      spec_names[i],
      ' NGD\n E=',
      round(spec_int[i], 3),
      ' M(Ex)=',
      round(mean_ag_expr[i], 3)
    ),
    xaxt = 'n',
    xlim = c(0, xmx),
    ylim = c(0, ymx)
  )
  plot(
    h_wols,
    col = cvc,
    main = paste0(
      spec_names[i],
      ' WOLS\n E=',
      round(spec_int[i], 3),
      ' M(Ex)=',
      round(mean_ag_expr[i], 3)
    ),
    xaxt = 'n',
    xlim = c(0, xmx),
    ylim = c(0, ymx),
    yaxt = 'n'
  )
  plot(
    h_ols,
    col = cvc,
    main = paste0(
      spec_names[i],
      ' OLS\n E=',
      round(spec_int[i], 3),
      ' M(Ex)r=',
      round(mean_ag_expr[i], 3)
    ),
    xaxt = 'n',
    xlim = c(0, xmx),
    ylim = c(0, ymx),
    yaxt = 'n'
  )
  
}

Correlation of mean spectrum error with the spectrum energy.

ngd_msr = colSums(ngd_err) / nr
ols_msr = colSums(ols_err) / nr
wols_msr = colSums(wols_err) / nr

df_ngd = data.frame(spectrum_energy = norm(spec_int),
                    mean_spectrum_error = norm(ngd_msr))
df_ngd = df_ngd[order(df_ngd$mean_spectrum_error),]
df_ols = data.frame(spectrum_energy = norm(spec_int),
                    mean_spectrum_error = norm(ols_msr))
df_ols = df_ols[order(df_ols$mean_spectrum_error),]
df_wols = data.frame(spectrum_energy = norm(spec_int),
                     mean_spectrum_error = norm(wols_msr))
df_wols = df_wols[order(df_wols$mean_spectrum_error),]

cn = cor.test(ngd_msr, spec_int)
co = cor.test(ols_msr, spec_int)
cw = cor.test(wols_msr, spec_int)

par(mar = c(0, 0, 3, 0), mfrow = c(1, 3))
plot(
  df_ngd$spectrum_energy,
  df_ngd$mean_spectrum_error,
  pch = 16,
  cex = .1,
  main = paste0(
    'NOUGAD r=',
    round(cn$estimate, 3),
    ' 95CI=(',
    round(cn$conf.int[1], 2),
    ';',
    round(cn$conf.int[2], 2),
    ')\n p=',
    format.pval(cn$p.value)
  ),
  xaxt = 'n',
  yaxt = 'n'
)
abline(0, 1, col = 'red')
plot(
  df_wols$spectrum_energy,
  df_wols$mean_spectrum_error,
  pch = 16,
  cex = .1,
  main = paste0(
    'WOLS r=',
    round(cw$estimate, 3),
    ' 95CI=(',
    round(cw$conf.int[1], 2),
    ';',
    round(cw$conf.int[2], 2),
    ')\n p=',
    format.pval(cw$p.value)
  ),
  xaxt = 'n',
  yaxt = 'n'
)
abline(0, 1, col = 'red')
plot(
  df_ols$spectrum_energy,
  df_ols$mean_spectrum_error,
  pch = 16,
  cex = .1,
  main = paste0(
    'OLS r=',
    round(co$estimate, 3),
    ' 95CI=(',
    round(co$conf.int[1], 2),
    ';',
    round(co$conf.int[2], 2),
    ')\n p=',
    format.pval(co$p.value)
  ),
  xaxt = 'n',
  yaxt = 'n'
)
abline(0, 1, col = 'red')

Visually compare unmixed cells to ground truth in 2D space

  1. Compare selected marker pairs on 2D plots

Using lines.

colnames(gt_t_1k) = spec_names
colnames(ngd_t_1k) = spec_names
colnames(ols_t_1k) = spec_names
colnames(wols_t_1k) = spec_names

ngd_t_1k = as.data.frame(ngd_t_1k)
gt_t_1k = as.data.frame(gt_t_1k)
ols_t_1k = as.data.frame(ols_t_1k)
wols_t_1k = as.data.frame(wols_t_1k)

marker_cords = which(mean_ag_expr > 0.3)
markers = spec_names[marker_cords]
markers = markers[markers != 'Autofluorescence']
combos = combn(markers, 2)
for (i in 1:ncol(combos)) {
  df_ngd = data.frame(
    X1 = gt_t_1k[combos[1, i]][, 1],
    X2 = gt_t_1k[combos[2, i]][, 1],
    X1.1 = ngd_t_1k[combos[1, i]][, 1],
    X2.1 = ngd_t_1k[combos[2, i]][, 1]
  )
  df_ols = data.frame(
    X1 = gt_t_1k[combos[1, i]][, 1],
    X2 = gt_t_1k[combos[2, i]][, 1],
    X1.1 = ols_t_1k[combos[1, i]][, 1],
    X2.1 = ols_t_1k[combos[2, i]][, 1]
  )
  df_wols = data.frame(
    X1 = gt_t_1k[combos[1, i]][, 1],
    X2 = gt_t_1k[combos[2, i]][, 1],
    X1.1 = wols_t_1k[combos[1, i]][, 1],
    X2.1 = wols_t_1k[combos[2, i]][, 1]
  )
  
  p1 = ggplot()  + geom_segment(
    data = df_ngd,
    aes(
      x = X1,
      y = X2,
      xend = X1.1,
      yend = X2.1
    ),
    colour = "red",
    alpha = 0.5
  ) + geom_point(aes(x = df_ngd$X1, y = df_ngd$X2),
                 size = 0.5,
                 alpha = 0.3) + theme_cowplot() + ylab(combos[2, i]) + xlab('') + ggtitle('NGD')
  p2 = ggplot()  + geom_segment(
    data = df_wols,
    aes(
      x = X1,
      y = X2,
      xend = X1.1,
      yend = X2.1
    ),
    colour = "red",
    alpha = 0.5
  ) + geom_point(aes(x = df_ngd$X1, y = df_ngd$X2),
                 size = 0.5,
                 alpha = 0.3) + theme_cowplot() + ylab('') + xlab(combos[1, i]) + ggtitle('WOLS')
  p3 = ggplot()  + geom_segment(
    data = df_ols,
    aes(
      x = X1,
      y = X2,
      xend = X1.1,
      yend = X2.1
    ),
    colour = "red",
    alpha = 0.5
  ) + geom_point(aes(x = df_ngd$X1, y = df_ngd$X2),
                 size = 0.5,
                 alpha = 0.3) + theme_cowplot() + ylab('') + xlab('') + ggtitle('OLS')
  
  print(ggarrange(p1, p2, p3, ncol = 3))
  
}

Color cells by squared error magnitude.

colnames(gt_t) = spec_names
colnames(ngd_t) = spec_names
colnames(ols_t) = spec_names
colnames(wols_t) = spec_names

ngd_t = as.data.frame(ngd_t)
gt_t = as.data.frame(gt_t)
ols_t = as.data.frame(ols_t)
wols_t = as.data.frame(wols_t)


ngd_err = as.data.frame(ngd_err)
colnames(ngd_err) = spec_names
ols_err = as.data.frame(ols_err)
colnames(ols_err) = spec_names
wols_err = as.data.frame(wols_err)
colnames(wols_err) = spec_names
for (i in 1:ncol(combos)) {
  aes_engd = rowSums(ngd_err[, c(combos[1, i], combos[2, i])]) / 2
  aes_eols = rowSums(ols_err[, c(combos[1, i], combos[2, i])]) / 2
  aes_ewols = rowSums(wols_err[, c(combos[1, i], combos[2, i])]) / 2
  mx = max(c(max(aes_engd), max(aes_eols), max(aes_ewols)))
  x_gt = gt_t[combos[1, i]][, 1]
  y_gt = gt_t[combos[2, i]][, 1]
  x_ngd = ngd_t[combos[1, i]][, 1]
  y_ngd = ngd_t[combos[2, i]][, 1]
  x_ols = ols_t[combos[1, i]][, 1]
  y_ols = ols_t[combos[2, i]][, 1]
  x_wols = wols_t[combos[1, i]][, 1]
  y_wols = wols_t[combos[2, i]][, 1]
  mx_x = max(c(max(x_ols), max(x_wols), max(x_ngd), max(x_gt)))
  mx_y = max(c(max(y_ols), max(y_wols), max(y_ngd), max(y_gt)))
  mn_x = min(c(min(x_ols), min(x_wols), min(x_ngd), min(x_gt)))
  mn_y = min(c(min(y_ols), min(y_wols), min(y_ngd), min(y_gt)))
  Error = rep(0, nr)
  
  p1 = qplot() + geom_point(aes(x_gt, y_gt, colour = Error),
                            size = 1,
                            alpha = 0.5) + scale_colour_gradientn(
                              limits = c(0, mx),
                              trans = 'pseudo_log',
                              colours = c("blue", "yellow", "red"),
                              labels = c(' 0', ' 2', ' 5', '10 ', '20 ', '40 ', '60 '),
                              breaks = c(0, 2, 5, 10, 20, 40, 60)
                            ) + ggtitle('Ground Truth') + theme_cowplot() +
    ylab('') + xlab('') + scale_x_continuous(limits = c(mn_x, mx_x)) + scale_y_continuous(limits = c(mn_y, mx_y)) +
    theme_void()
  p2 = qplot() + geom_point(aes(x_ngd, y_ngd, colour = aes_engd),
                            size = 1,
                            alpha = 0.5) + scale_colour_gradientn(
                              limits = c(0, mx),
                              trans = 'pseudo_log',
                              colours = c("blue", "yellow", "red"),
                              labels = c(' 0', ' 2', ' 5', '10 ', '20 ', '40 ', '60 '),
                              breaks = c(0, 2, 5, 10, 20, 40, 60)
                            ) + ggtitle('NGD') + theme_cowplot() +
    ylab('') + xlab('') + scale_x_continuous(limits = c(mn_x, mx_x)) + scale_y_continuous(limits = c(mn_y, mx_y)) +
    theme_void()
  p3 = qplot() + geom_point(aes(x_wols, y_wols, colour = aes_ewols),
                            size = 1,
                            alpha = 0.5) + scale_colour_gradientn(
                              limits = c(0, mx),
                              trans = 'pseudo_log',
                              colours = c("blue", "yellow", "red"),
                              labels = c(' 0', ' 2', ' 5', '10 ', '20 ', '40 ', '60 '),
                              breaks = c(0, 2, 5, 10, 20, 40, 60)
                            ) + ggtitle('WOLS') + theme_cowplot() +
    ylab(combos[2, i]) + xlab(combos[1, i]) + scale_x_continuous(limits = c(mn_x, mx_x)) +
    scale_y_continuous(limits = c(mn_y, mx_y)) + theme_void()
  p4 = qplot() + geom_point(aes(x_ols, y_ols, colour = aes_eols),
                            size = 1,
                            alpha = 0.5) + scale_colour_gradientn(
                              limits = c(0, mx),
                              trans = 'pseudo_log',
                              colours = c("blue", "yellow", "red"),
                              labels = c(' 0', ' 2', ' 5', '10 ', '20 ', '40 ', '60 '),
                              breaks = c(0, 2, 5, 10, 20, 40, 60)
                            ) + ggtitle('OLS') + theme_cowplot() +
    ylab('') + xlab('') + scale_x_continuous(limits = c(mn_x, mx_x)) + scale_y_continuous(limits = c(mn_y, mx_y)) +
    theme_void()
  figure = ggarrange(
    p1,
    p2,
    p3,
    p4,
    ncol = 2,
    nrow = 2,
    common.legend = T,
    legend = 'right'
  ) + theme(plot.background = element_rect())
  print(annotate_figure(
    figure,
    left = text_grob(combos[2, i], rot = 90),
    bottom = text_grob(paste0(combos[1, i], '\n', '\n'))
  ))
}

  1. Compare points across all markers embedded and visualized in 2D using self-organizing map from EmbedSOM library

Using lines.

map = SOM(gt_t, xdim = 20, ydim = 20)
e_gt = EmbedSOM(gt_t, map)
e_ngd = EmbedSOM(ngd_t, map)
e_ols = EmbedSOM(ols_t, map)
e_wols = EmbedSOM(wols_t, map)
p1 = ggplot()  + geom_segment(
  aes(
    x = e_gt[, 1],
    y = e_gt[, 2],
    xend = e_ngd[, 1],
    yend = e_ngd[, 2]
  ),
  colour = "red",
  alpha = 0.3
) + geom_point(aes(x = e_gt[, 1], y = e_gt[, 2]), size = 0.4, alpha = 0.3) +
  ggtitle('NGD') + scale_x_discrete(labels = NULL, breaks = NULL) + labs(x = '', y =
                                                                           '') + scale_y_discrete(labels = NULL, breaks = NULL) + theme_void()
p2 = ggplot()  + geom_segment(
  aes(
    x = e_gt[, 1],
    y = e_gt[, 2],
    xend = e_wols[, 1],
    yend = e_wols[, 2]
  ),
  colour = "red",
  alpha = 0.3
) + geom_point(aes(x = e_gt[, 1], y = e_gt[, 2]), size = 0.5, alpha = 0.4) +
  ggtitle('WOLS') + scale_x_discrete(labels = NULL, breaks = NULL) + labs(x = '', y =
                                                                            '') + scale_y_discrete(labels = NULL, breaks = NULL) + theme_void()
p3 = ggplot()  + geom_segment(aes(
  e_gt[, 1],
  y = e_gt[, 2],
  xend = e_ols[, 1],
  yend = e_ols[, 2]
),
colour = "red",
alpha = 0.3) + geom_point(aes(x = e_gt[, 1], y = e_gt[, 2]), size = 0.5, alpha =
                            0.4) + ggtitle('OLS') + scale_x_discrete(labels = NULL, breaks = NULL) + labs(x = '', y =
                                                                                                            '') + scale_y_discrete(labels = NULL, breaks = NULL) + theme_void()

p1

p2

p3

ggarrange(p1, p2, p3, ncol = 3)

Color points by per-cell mean squared error.

Error = rep(0, nr)
p1 = qplot() + geom_point(aes(e_gt[, 1], e_gt[, 2], colour = Error),
                          size = 1,
                          alpha = 0.5) + scale_colour_gradientn(
                            limits = c(0, mx),
                            trans = 'pseudo_log',
                            colours = c("blue", "yellow", "red"),
                            labels = c(' 0', ' 2', ' 5', '10 ', '20 ', '40 ', '60 '),
                            breaks = c(0, 2, 5, 10, 20, 40, 60)
                          ) + ggtitle('Ground Truth') + theme_cowplot() +
  ylab('') + xlab('') + theme_void()
p2 = qplot() + geom_point(aes(e_ngd[, 1], e_ngd[, 2], colour = unname(ngd_mcr)),
                          size = 1,
                          alpha = 0.5) + scale_colour_gradientn(
                            limits = c(0, mx),
                            trans = 'pseudo_log',
                            colours = c("blue", "yellow", "red"),
                            labels = c(' 0', ' 2', ' 5', '10 ', '20 ', '40 ', '60 '),
                            breaks = c(0, 2, 5, 10, 20, 40, 60)
                          ) + ggtitle('NGD') + theme_cowplot() +
  ylab('') + xlab('') + theme_void()
p3 = qplot() + geom_point(aes(e_wols[, 1], e_wols[, 2], colour = unname(wols_mcr)),
                          size = 1,
                          alpha = 0.5) + scale_colour_gradientn(
                            limits = c(0, mx),
                            trans = 'pseudo_log',
                            colours = c("blue", "yellow", "red"),
                            labels = c(' 0', ' 2', ' 5', '10 ', '20 ', '40 ', '60 '),
                            breaks = c(0, 2, 5, 10, 20, 40, 60)
                          ) + ggtitle('WOLS') + theme_cowplot() +
  ylab(combos[2, i]) + xlab(combos[1, i]) + theme_void()
p4 = qplot() + geom_point(aes(e_ols[, 1], e_ols[, 2], colour = unname(ols_mcr)),
                          size = 1,
                          alpha = 0.5) + scale_colour_gradientn(
                            limits = c(0, mx),
                            trans = 'pseudo_log',
                            colours = c("blue", "yellow", "red"),
                            labels = c(' 0', ' 2', ' 5', '10 ', '20 ', '40 ', '60 '),
                            breaks = c(0, 2, 5, 10, 20, 40, 60)
                          ) + ggtitle('OLS') + theme_cowplot() +
  ylab('') + xlab('') + theme_void()
ggarrange(
  p1,
  p2,
  p3,
  p4,
  ncol = 2,
  nrow = 2,
  common.legend = T,
  legend = 'right'
)